# Overview #####################################################################
# Replication Script for "Why Join? How Civil Society Organizations' Attributes 
# Signal Congruence and Impact Community Engagement"
# author: Simon Hoellerbauer
# contact: hoellerbauers@gmail.com
# date created: August 30, 2021
# date last edited: September 13, 2021
################################################################################

# This document includes all of the quantitative analysis for 
# "Why Join? How Civil Society Organizations' Attributes Signal Congruence and 
# Impact Community Engagement"

# Set Up #######################################################################

#setting working directory 
## User must change wd_path to the location on their computer where the two data
## files are saved
wd_path <- 'must/change/to/data/files/location'
setwd(wd_path)

#load necessary packages
#package version used for final paper analysis noted here
library(cregg) # for subgroup analysis of conjoints; version 0.4.0
library(plyr) # for data manipulation; version 1.8.6
library(stargazer) #for producing model tables in LaTeX; version 5.2.2
library(labelled) #for dealing with labeled data even better; version 2.8.0
library(multiwayvcov) # for clustered standard errors; version 1.2.3
library(FindIt) # for causal ANOVA; version 1.2.1
library(tidyverse) # for easier data manipulation; version 1.3.1
library(xtable) #for making some LaTeX tables; version 1.8.4

# R Version for final paper analysis: 4.1.1
# RStudio Version for final paper analysis: 1.4.1717

# if you have a different versions of R, RStudio or these packages installed
# your output may not match the output in the paper exactly - however, there
# should be no drastic differences.

# Data #########################################################################

## Reading in data
load("org_level.RData")
load("vendor_long.RData")

# Convenience Functions ########################################################

#function to calculate average predicted effect
avg.pred.fun.gen <- function(model, invlink, varnames, values, 
                             v_cov = NULL, samp_coef = NULL, data = NULL,
                             nsim = 1000){
  
  if(is.null(v_cov)) v_cov <- vcov(model)
  
  if(class(model) == "glm.cluster" | class(model) == "lm.cluster") model <- model$glm_res
  
  if(is.null(data)) data <- model$data
  
  if(is.null(samp_coef)) samp_coef <- t(mvrnorm(nsim, coef(model), v_cov))
  
  hyp_data <- do.call(expand.grid, values)
  
  if(class(model) == "lm") form <- terms(model) else form <- model$formula
  
  avg_preds <- adply(hyp_data,1,function(x){

    data[,varnames] <- x
    des_mat <- model.matrix(form, data = data)
    all_preds <- invlink(des_mat %*% samp_coef)
    colMeans(all_preds)
  })
}

# function to calculate differences (or differences in differences) in 
# probabilities for conjoint
pred.diff <- function(avg_preds, base_level){
  
  l <- ncol(avg_preds)
  
  diffs <- apply(avg_preds[,2:l], 1, function(x){
       return(x - as.matrix(avg_preds[base_level, 2:l]))
      })
}

# function to get average predicted difference in 
# effect for one conjoint attr using avg.pred.fun.gen and using pred.diffs
avg.pred.conj.sin_at <- function(model, invlink, varname, values, probs, 
                             v_cov = NULL, samp_coef = NULL, diff = T,
                             diff_of_diff = F,
                             base_level = NULL, data = NULL,
                             nsim = 1000){
  
  if(is.null(base_level)) {
      base_level <- 1
  } else if (base_level == "max"){
      
    
    }
  
  if(diff){
    
   if(diff_of_diff){
      
      diffs1 <- pred.diff(avg.pred.fun.gen(model = model[[1]], invlink = invlink, 
                                           varnames = varname,
                                  values = values, v_cov = v_cov[[1]], 
                                  samp_coef = samp_coef, 
                                  data = data, nsim = nsim), base_level)
    
      diffs2 <- pred.diff(avg.pred.fun.gen(model = model[[2]], invlink = invlink, 
                                           varnames = varname,
                                  values = values, v_cov = v_cov[[2]], 
                                  samp_coef = samp_coef, 
                                  data = data, nsim = nsim), base_level)
      
      diffs <- diffs1 - diffs2
   } else{
     
         diffs <- pred.diff(avg.pred.fun.gen(model = model, varnames = varname,
                                  invlink = invlink, 
                                  values = values, v_cov = v_cov, 
                                  samp_coef = samp_coef, 
                                  data = data, nsim = nsim), base_level)
     
   }
  } else{
      
      if(diff_of_diff){
        
          diffs1 <- avg.pred.fun.gen(model = model[[1]], invlink = invlink, 
                                           varnames = varname,
                                  values = values, v_cov = v_cov[[1]], 
                                  samp_coef = samp_coef, 
                                  data = data, nsim = nsim)[, -1]
        
          diffs2 <- avg.pred.fun.gen(model = model[[2]], invlink = invlink, 
                                           varnames = varname,
                                  values = values, v_cov = v_cov[[2]], 
                                  samp_coef = samp_coef, 
                                  data = data, nsim = nsim)[, -1]
        
          diffs <- t(diffs1 - diffs2)
        
      } else {
        
          diffs <- t(avg.pred.fun.gen(model = model, invlink = invlink, 
                                    varnames = varname,
                                values = values, v_cov = v_cov, 
                                samp_coef = samp_coef, nsim = nsim)[, -1])
      }
  }
  
  diffs <- data.frame(values[[1]], 
                   t(apply(diffs, 2, quantile, probs = probs)))
    
  names(diffs) <- c("levels", "lo", "median", "hi")
    
  return(diffs)  

}

# function to combine together table of all attribute-levels
avg.pred.conj.all_ats <- function(model, invlink, varnames, values, probs, 
                             v_cov = NULL, samp_coef = NULL, diff = T,
                             diff_of_diff = F,
                             base_level = NULL, data = NULL,
                             nsim = 2000)
  {
    
    map2_dfr(varnames, values, function(x, y){
      
      at_levels <- avg.pred.conj.sin_at(model = model, invlink = invlink, varname = x, 
                           values = y, probs = probs, 
                             v_cov = v_cov, samp_coef = samp_coef, diff = diff,
                             diff_of_diff = diff_of_diff,
                             base_level = base_level, data = data,
                             nsim = nsim)
      data.frame(attribute = x, at_levels)
      
    })
  
}

# function that produces a basic plot from conjoint data
plot.conjoint <- function(data, lev_labs = NULL)
  {
    atts <- unique(data$attribute) 
    data <- split(data, data$attribute)
    ##Note that split organizes the split objects by the levels of the factor
    ##used to split them. To avoid this, would need to do something like
    ##data <- data[attributes], like in get_amces below
    
    data <- data[atts]
    
    data <- map_dfr(data, function(x){
      
      x$levels <- as.character(x$levels)
      
      attribute <- unique(x$attribute)
      
      rbind(c(attribute = attribute, 
              levels = paste0("(", toupper(attribute), ")"), 
              lo = NA, median = NA, hi = NA), x)
    })
    
    #because rbind or mapdfr converts to character
    data[, 3:5] <- select(data, 3:5) %>% mutate_if(is.character, as.numeric) 
    
    if(is.character(lev_labs)) data$levels <- lev_labs  
    
    data$levels <- factor(data$levels, levels = data$levels)
    
    ggplot(data = data) + 
      geom_point(aes(x = median, y = levels), size = 1.25) +
      geom_errorbarh(aes(y = levels, xmin = lo, xmax = hi)) +
      scale_y_discrete("Attribute-Levels", 
                       limits =  rev(levels(data$levels)), drop = FALSE) + 
      theme_bw() + 
      theme(panel.border = element_blank(), 
            panel.grid.major = element_blank(), 
            panel.grid.minor = element_blank(), 
            axis.line = element_line(colour = "black")) +
      geom_vline(aes(xintercept = 0)) + 
      xlab("Change in Pr(Organization Selected)")
    
}

#for plotting effects side by side; new function created that is very similar to
#plot.conjoint in order to not affect code for now
plot.conjoint2 <- function(data, lev_labs = NULL, Group, int = 0)
  {
    atts <- unique(data$attribute)  
  
    data <- split(data, data$attribute)
    ##Note that split organizes the split objects by the levels of the factor
    ##used to split them. To avoid this, would need to do something like
    ##data <- data[attributes], like in get_amces below
    
    data <- data[atts]
    
    data <- map_dfr(data, function(x){
      
      x$levels <- as.character(x$levels)
      
      attribute <- unique(x$attribute)
      
      rbind(c(attribute = attribute, 
              levels = paste0("(", toupper(attribute), ")"), 
              lo = NA, median = NA, hi = NA, Group = Group), x)
    })
    
    #because rbind or mapdfr converts to character
    data[, 3:5] <- select(data, 3:5) %>% mutate_if(is.character, as.numeric) 
    
    if(is.character(lev_labs)) data$levels <- lev_labs  
    
    data$levels <- factor(data$levels, levels = unique(data$levels))
    
    ggplot(data = data) + 
         geom_point(aes(x = median, y = levels,
                        shape = Group), 
                    size = 1.25, 
                    position = position_dodge(width = .75)) +
         geom_errorbarh(aes(y = levels, xmin = lo, xmax = hi,
                            linetype = Group), 
                        position = position_dodge(width = .75)) +
      scale_y_discrete("Attribute-Levels", 
                       limits =  rev(levels(data$levels)), drop = FALSE) + 
      theme_bw() + 
      theme(panel.border = element_blank(), 
            panel.grid.major = element_blank(), 
            panel.grid.minor = element_blank(), 
            axis.line = element_line(colour = "black"),
            legend.position = "bottom") +
      geom_vline(aes(xintercept = int)) + 
      xlab("Change in Pr(Organization Selected)")
    
  }


#default wrapper for work flow for logit model portion of project
plot.conjoint.default <- function(model, cluster = "resp_id", 
                                  v_cov = cluster.vcov(model, model$data[, cluster]), 
                                  diff = TRUE, diff_of_diff = FALSE, data = NULL,
                                  invlink = invlogit){
  
  effects_frame <- avg.pred.conj.all_ats(model, invlink,
                        c("capital", "leader_frmr_prof", "funding", "party"),
                        list(list(capital = levels(org_level$capital)),
                             list(leader_frmr_prof = levels(org_level$leader_frmr_prof)),
                             list(funding = levels(org_level$funding)),
                             list(party = levels(org_level$party))),
                        probs = c(0.025, 0.5, 0.975),
                        diff = diff,
                        diff_of_diff = diff_of_diff,
                        v_cov = v_cov,
                        data = data)
  
  effects_plot <- effects_frame %>% 
    plot.conjoint(lev_labs = lev_labs)
  
  return(list(effects = effects_frame, plot = effects_plot))
} 

#function to get se from cluster model (makes stargazer tables easier)
calc.ses <- function(model, keep = NULL){
  
  if(is.null(keep)){
    vcov_mat <- vcov(model)
  } else vcov_mat <- vcov(model)[keep, keep]
  
  return(sqrt(diag(vcov_mat)))
}

# function to get clustered standard errors from a regular model object
# useful when not using lm.cluster or glm.cluster
calc.ses.cluster <- function(model, cluster = "resp_id", keep = NULL, data = NULL){
  
  if(is.null(data)) cluster <- model$data[, cluster] else cluster <- data[, cluster]
  
  vcov_mat <- cluster.vcov(model, cluster)
  
  return(sqrt(diag(vcov_mat)))
}
  
#function to return list of cluster robust vcovs from list of models
get.vcovs <- function(models_list, cluster = "resp_id"){
  map(models_list, function(x) cluster.vcov(x, x$data[, cluster]))
}

#function to negate %in%
'%nin%' <- Negate('%in%')

#function to format AMCEs into a table for easy plotting (for linear prob mod)
get_amces <- function(lm, resp_id, keep, lev_labs, att_names, att_lengths,
                      alpha = 0.05){
  att_l1 <- att_lengths - 1
  atts <- unlist(map2(att_names, att_l1, function(.x, .y){
    rep(.x, .y)
  }))
  se <- cluster.vcov(lm, resp_id) %>% diag %>% sqrt
  cv <- qt(1-alpha/2, length(unique(resp_id)) - length(coef(lm)))
  amces <- data.frame(attribute = NA,
                      levels = names(coef(lm)), 
                      mean = coef(lm),
                      low = coef(lm) - cv * se,
                      high = coef(lm) + cv * se)
  if(is.character(keep)) amces <- filter(amces, levels %in% keep)
  if(is.logical(keep)) amces <- filter(amces, keep)
  if(is.numeric(keep)) amces <- amces[keep,]
  amces$attribute <- atts
  #filling in 0s and NAs
  amces <- split(amces, amces$attribute)
  
  amces <- amces[att_names]
  
  amces <- map_dfr(amces, function(.x){
    header <- c(attribute = unique(.x$attribute),
                levels = NA,
                mean = NA,
                low = NA,
                high = NA)
    base_cat <- c(attribute = unique(.x$attribut),
                  levels = NA, 
                  mean = 0,
                  low = NA,
                  high = NA)
    rbind(header, base_cat, .x)
  })
  
  #because map_dfr converts to character 
  amces[, 3:5] <- select(amces, 3:5) %>% mutate_if(is.character, as.numeric)
  
  #now to create atts_levels vector
  atts_levels <- unlist(map2(att_names, lev_labs, function(.x, .y){
    
    c(paste0("(", toupper(.x), ")"), unlist(.y))
    
  }))
  
  amces$levels <- factor(atts_levels, levels = atts_levels)
  
  amces
  
}



#function that returns an AMCE Plot
plot_amces <- function(amces, title, labx, compare = F){
     amce_plot <- ggplot(data = amces) + 

      scale_y_discrete("Attribute-Levels", 
                       limits =  rev(levels(amces$levels)), drop = FALSE) + 
      theme_bw() + 
      theme(panel.border = element_blank(), 
            panel.grid.major = element_blank(), 
            panel.grid.minor = element_blank(), 
            axis.line = element_line(colour = "black"),
            legend.position = "bottom") +
      geom_vline(aes(xintercept = 0)) + 
      xlab(paste0("Change in Pr(",labx, ")")) + labs(title = title)
  
     if(compare){
       amce_plot <- amce_plot + 
         geom_point(aes(x = mean, y = levels,
                        shape = Group), 
                    size = 1.25, 
                    position = position_dodge(width = .75)) +
         geom_errorbarh(aes(y = levels, xmin = low, xmax = high,
                            linetype = Group), 
                        position = position_dodge(width = .75))
         
     } else {
       amce_plot <- amce_plot + geom_point(aes(x = mean, y = levels), 
                                           size = 1.25) +
         geom_errorbarh(aes(y = levels, xmin = low, xmax = high))
     }
     
     amce_plot
     
}

#function to change features and levels of a cregg object
update_labels <- function(object, lev_labs, feat_labs, att_lengths,
                          BY = F, BY_labs = NULL, BY_name = NULL,
                          keep = 1:nrow(object),
                          old_class = class(object)){
  old_class #evaluate here otherwise class changed before old_class called

  if(BY){
    if(!is.null(BY_labs)){
      object$BY <- factor(rep(BY_labs, each = sum(att_lengths)), 
                          levels = BY_labs)
    }
    
    if(!is.null(BY_name)){
      names(object)[names(object) == "BY"] <- BY_name
    }
    
    lev_labs <- c(lev_labs, lev_labs)
    att_lengths <- c(att_lengths, att_lengths)
    feat_labs <- c(feat_labs, feat_labs)
  }
  
  object <- as.data.frame(object)
  
  object$level <- factor(lev_labs, levels = unique(lev_labs))
  
  object$feature <- factor(unlist(map2(feat_labs, att_lengths, function(.x, .y){
    rep(.x, .y)
  })), levels = unique(feat_labs))
  
  object <- object[keep, ]
  
  class(object) <- old_class
  
  object
}

# Necessary Objects ############################################################

# These objects are necessary for plotting that occurs later. They are reused 
# often while making plots and tables, which is why I define them here.

#setting labels for conjoint plot
lev_labs <- c(toupper("Organization Founded:"),"District Capital", "Lilongwe",
              "Capital of South Africa","Western Donor Capital", 
              toupper("Leader Used To Be:"), "Vendor","Carpenter", "Laborer",
              "Business Owner", "Bureaucrat", "Politician", 
              toupper("Funding From:"), "Citizen Contributions", 
              "Malawian Government","South African Government", 
              "Chinese Government", "Western Government",  
              toupper("Party Affiliation:"), "Independent", "Connected")

#for side by side plots
lev_labs2 <- c(toupper("Organization Founded:"), "District Capital", "Lilongwe",
              "Capital of South Africa","Western Donor Capital", 
              "District Capital", "Lilongwe",
              "Capital of South Africa","Western Donor Capital", 
               toupper("Leader Used To Be:"), "Vendor","Carpenter", "Laborer",
              "Business Owner", "Bureaucrat", "Politician", 
              "Vendor","Carpenter", "Laborer",
              "Business Owner", "Bureaucrat", "Politician",   
               toupper("Funding From:"), "Citizen Contributions", 
              "Malawian Government","South African Government", 
              "Chinese Government", "Western Government", 
              "Citizen Contributions", 
              "Malawian Government","South African Government", 
              "Chinese Government", "Western Government",
               toupper("Party Affiliation:"), "Independent", "Connected",
              "Independent", "Connected")

#for linear prob model AMCE plots
lev_labs_ols <- list(c("District Capital", "Lilongwe",
              "Capital of South Africa","Western Donor Capital"),
              c("Vendor","Carpenter", "Laborer",
              "Business Owner", "Bureaucrat", "Politician"),
                  c("Citizen Contributions", 
              "Malawian Government","South African Government", 
              "Chinese Government", "Western Government"),
                  c("Independent", "Connected"))

# full names of attributes
att_names <- c("Organization Founded:", "Leader Used To Be:", "Funding From:", 
               "Party Affiliation:")

# vector of how many levels within each attribute
att_lengths <- c(4, 6, 5, 2)

# laberl for regression tables
regress_tab_labels <- c("Intercept", 
                        "Org. Founded: Lilongwe",
                        "Org. Founded: Capital of South Africa",
                        "Org. Founded: Western Donor Capital",
                        "Leader Former: Carpenter",
                        "Leader Former: Laborer",
                        "Leader Former: Business Owner",
                        "Leader Former: Bureaucrat",
                        "Leader Former: Politican",
                        "Funding From: Malawian Government",
                        "Funding From: South African Government",
                        "Funding From: Chinese Government",
                        "Funding From: Western Government",
                        "Org. Connected to Pol. Party")


# Main Paper ###################################################################

## Experimental Design #########################################################

#number of vendors
nrow(vendor_long)

#number of markets
length(unique(vendor_long$market))

## Results #####################################################################

#Analysis and Plot for Figure 1

#model
meeting_ols1 <- lm(data = org_level, meeting_yn ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party)

#plot
plot_amces(get_amces(meeting_ols1, org_level$resp_id, 2:14,
                     lev_labs_ols, att_names, att_lengths),
           title = "Effects of Attributes on Self-Reported Engagement",
           labx = "Organization Selected")


# Supplementary Appendix #######################################################

## Appendix A: Detailed Experimental Design Information #####

### Context of the Survey ####

#### Sample ####

#Footnote 5 - median income in sample:
median(vendor_long$hh_income_trim_99, na.rm = T)

### Experimental Design ####

#### Analysis Sample Size ####

#Number of respondents completing the survey:
nrow(vendor_long)

#Ideal world total number of observations in profile-level data:
nrow(vendor_long) * 4

#Actual number of completed responses for two outcome questions:
#meeting
sum(complete.cases(org_level[, c("meeting", "capital", "leader_frmr_prof",
                                 "funding", "party")]))
#scandal
sum(complete.cases(org_level[, c("scandal", "capital", "leader_frmr_prof",
                                 "funding", "party")]))


## Appendix C: Scandal and Meeting Question Results Compared ####

#Different responses to meeting and scandal questions (paragraph 1):
org_level$same_org <- org_level$meeting_yn == org_level$scandal_ny
xtabs(~same_org, org_level) %>% prop.table()


# Scandal model:
scandal_ols1 <- lm(data = org_level, scandal_ny ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party)


#Table C1:
#LaTeX for table in paper
stargazer(meeting_ols1,
          scandal_ols1,
          se = list(calc.ses.cluster(meeting_ols1, data = org_level),
                    calc.ses.cluster(scandal_ols1, data = org_level)),
          header = FALSE,
          t.auto = TRUE,
          keep = 0:14,
          intercept.bottom = FALSE,
          covariate.labels = regress_tab_labels,
          star.cutoffs = c(.05, NA, NA),
          notes = c("*: $p<0.05$"),
          single.row = T,
          font.size = "footnotesize",
          label = "app:tab:models_ols",
          title = "\\footnotesize Linear Probability Models Used For Figure 1 in Main Text. Baseline for `Founded' variable is \\textit{District Capital}. Baseline for `Leader's Former Profession' variable is \\textit{Vendor}. Baseline for `Organization's Funding Is From' variable is \\textit{Contributions from Malawian Citizens}. Baseline for `Organization's Party Connections' variable is \\textit{Independent}.")

#Figure C1:
#binding together amces for both models
amces_ols_both <- rbind(get_amces(meeting_ols1, org_level$resp_id, 2:14,
                     lev_labs_ols, att_names, att_lengths), 
                     get_amces(scandal_ols1, org_level$resp_id, 2:14,
                     lev_labs_ols, att_names, att_lengths))
#adding Group variable for ggplot (inside plot_amces)
amces_ols_both$Group <- c(rep("Meeting", 21),
                          rep("No Scandal", 21))
#plotting figure
plot_amces(amces_ols_both, 
           title = "AMCEs for Both Outcome Questions",
           labx = "Organization Selected",
           compare = T)

## Appendix D: Subgroup Analyses ####

### Community Organization Member ####

#Information for Table D1 (table was created used tablesgenerator.com):
table(vendor_long$member_co)
table(vendor_long$member_co)/nrow(vendor_long)
sum(is.na(vendor_long$member_co))
sum(is.na(vendor_long$member_co))/nrow(vendor_long)


#Figure D1 Panel (a):
#differences in marginal means between members and non-members
meeting_mmdiff_member <- mm_diffs(org_level,
                                  meeting_yn ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                                  id = ~ resp_id,
                                  by = ~ member_co,
                                  alpha = .05)
#updating labels to make them more readable
meeting_mmdiff_member <- update_labels(meeting_mmdiff_member, 
                             unlist(lev_labs_ols), att_names, 
                             att_lengths)
#plot of these differences
plot(meeting_mmdiff_member) + theme(legend.position = "none") + 
  scale_colour_grey()


# Figure D1 Panel (b):
#marginal means by members and non-members
meeting_mm_member1 <- cj(org_level, meeting_yn ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                        id = ~ resp_id, 
                        estimate = "mm",
                        by = ~ member_co)
#updating labels to make them more readable
meeting_mm_member1 <- update_labels(meeting_mm_member1,
                                      unlist(lev_labs_ols), att_names,
                                      att_lengths, BY = T, 
                                      BY_labs = c("No",
                                                  "Yes"),
                                    BY_name = "Member")
#plot of marginal means by members and non-members
plot(meeting_mm_member1, group = "Member", vline = 0.5)  + 
  scale_colour_grey()

#Figure D1 Anova Test (in caption):
cj_anova(org_level[!is.na(org_level$member_co), ],
         meeting_yn ~ capital +
                                 leader_frmr_prof +
                                 funding + 
                                 party,
                        id = ~ resp_id, 
                        by = ~ member_co)


#Figure D2 Panel (a):
#difference in marginal means between members and non-members
scandal_mmdiff_member <- mm_diffs(org_level,
                                  scandal_ny ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                                  id = ~ resp_id,
                                  by = ~ member_co,
                                  alpha = .05)
#updating labels to make them more readable
scandal_mmdiff_member <- update_labels(scandal_mmdiff_member, 
                             unlist(lev_labs_ols), att_names, 
                             att_lengths)
#plotting differences
plot(scandal_mmdiff_member) + theme(legend.position = "none")  + 
  scale_colour_grey()


#Figure D2 Panel (b):
#marginal means by members and non-members
scandal_mm_member1 <- cj(org_level, scandal_ny ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                        id = ~ resp_id, 
                        estimate = "mm",
                        by = ~ member_co)
#updating labels to make them more readable
scandal_mm_member1 <- update_labels(scandal_mm_member1,
                                      unlist(lev_labs_ols), att_names,
                                      att_lengths, BY = T, 
                                      BY_labs = c("No",
                                                  "Yes"),
                                    BY_name = "Member")
#plotting these marginal means
plot(scandal_mm_member1, group = "Member", vline = 0.5)  + 
  scale_colour_grey()


#Figure D2 Anova Test (in caption):
cj_anova(org_level[!is.na(org_level$member_co), ],
         scandal_ny ~ capital +
                                 leader_frmr_prof +
                                 funding + 
                                 party,
                        id = ~ resp_id, 
                        by = ~ member_co)


### NGO Apathy ####

#Information for Table D2 (table was created using tablesgenerator.com):
table(vendor_long$apathy_NGO)
table(vendor_long$apathy_NGO)/nrow(vendor_long)
sum(is.na(vendor_long$apathy_NGO))
sum(is.na(vendor_long$apathy_NGO))/nrow(vendor_long)


#Figure D3 Panel (a):
#differences in marginal means between apathys and non-apathys
meeting_mmdiff_apathy <- mm_diffs(org_level,
                                  meeting_yn ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                                  id = ~ resp_id,
                                  by = ~ apathy_fac,
                                  alpha = .05)
#updating labels to make them more readable
meeting_mmdiff_apathy <- update_labels(meeting_mmdiff_apathy, 
                             unlist(lev_labs_ols), att_names, 
                             att_lengths)
#plot of these differences
plot(meeting_mmdiff_apathy) + theme(legend.position = "none")  + 
  scale_colour_grey()


#Figure D3 Panel (b):
#marginal means by apathys and non-apathys
meeting_mm_apathy1 <- cj(org_level, meeting_yn ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                        id = ~ resp_id, 
                        estimate = "mm",
                        by = ~ apathy_fac)
#updating labels to make them more readable
meeting_mm_apathy1 <- update_labels(meeting_mm_apathy1,
                                      unlist(lev_labs_ols), att_names,
                                      att_lengths, BY = T, 
                                      BY_labs = c("Disagree",
                                                  "Agree"),
                                    BY_name = "Apathetic")
#plot of marginal means by apathys and non-apathys
plot(meeting_mm_apathy1, group = "Apathetic", vline = 0.5)  + 
  scale_colour_grey()


#Figure D3 Anova Test (in caption):
cj_anova(org_level[!is.na(org_level$apathy_in), ],
         meeting_yn ~ capital +
                                 leader_frmr_prof +
                                 funding + 
                                 party,
                        id = ~ resp_id, 
                        by = ~ apathy_fac)


#Figure D4 Panel (a):
#difference in marginal means between apathys and non-apathys
scandal_mmdiff_apathy <- mm_diffs(org_level,
                                  scandal_ny ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                                  id = ~ resp_id,
                                  by = ~ apathy_fac,
                                  alpha = .05)
#updating labels to make them more readable
scandal_mmdiff_apathy <- update_labels(scandal_mmdiff_apathy, 
                             unlist(lev_labs_ols), att_names, 
                             att_lengths)
#plotting differences
plot(scandal_mmdiff_apathy) + theme(legend.position = "none")  + 
  scale_colour_grey()


#Figure D4 Panel (b):
#marginal means by apathys and non-apathys
scandal_mm_apathy1 <- cj(org_level, scandal_ny ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                        id = ~ resp_id, 
                        estimate = "mm",
                        by = ~ apathy_fac)
#updating labels to make them more readable
scandal_mm_apathy1 <- update_labels(scandal_mm_apathy1,
                                      unlist(lev_labs_ols), att_names,
                                      att_lengths, BY = T, 
                                      BY_labs = c("Disagree",
                                                  "Agree"),
                                    BY_name = "Apathetic")
#plotting these marginal means
plot(scandal_mm_apathy1, group = "Apathetic", vline = 0.5)  + 
  scale_colour_grey()



#Figure D4 Anova Test (in caption):
cj_anova(org_level[!is.na(org_level$apathy_in), ],
         scandal_ny ~ capital +
                                 leader_frmr_prof +
                                 funding + 
                                 party,
                        id = ~ resp_id, 
                        by = ~ apathy_fac)

## Appendix E: Interaction Analysis Using Causal ANOVA ####

#Analysis:
## first remove observations that have NAs in attributes, otherwise run into 
## difficulty with CausalANOVA

# first find obs where at least one attribue is NA
# for meeting question
nas_m <- org_level %>% select(meeting_yn,
                            leader_frmr_prof, funding, capital, party) %>% 
  complete.cases()

#for scandal question
nas_s <- org_level %>% select(scandal_ny, 
                            leader_frmr_prof, funding, capital, party) %>% 
  complete.cases() 

#dropping all *pairs* where such an issue occurs (importance for differencing)
#also have to relevel variables to make output comparable to above
#first for meeting question
org_level_no_nas_m <- org_level %>% 
  filter(pair_id %nin% (unique(org_level$pair_id[!nas_m]))) %>% 
  mutate(capital = factor(capital, levels = levels(capital)[4:1]),
         leader_frmr_prof = factor(leader_frmr_prof, 
                                   levels = levels(leader_frmr_prof)[6:1]),
         funding = factor(funding, levels = levels(funding)[5:1]),
         party = factor(party, levels = levels(party)[2:1]))
#then for scandal question
org_level_no_nas_s <- org_level %>% 
  filter(pair_id %nin% (unique(org_level$pair_id[!nas_s]))) %>% 
  mutate(capital = factor(capital, levels = levels(capital)[4:1]),
         leader_frmr_prof = factor(leader_frmr_prof, 
                                   levels = levels(leader_frmr_prof)[6:1]),
         funding = factor(funding, levels = levels(funding)[5:1]),
         party = factor(party, levels = levels(party)[2:1]))

### Meeting Analysis -----------------------------------------------------------

#separate into train and test sets for both outcomes
set.seed(1374)
train_ind_m <- sample(unique(org_level_no_nas_m$resp_id), 
                    length(unique(org_level_no_nas_m$resp_id))/2, replace = FALSE)
test_ind_m <- setdiff(unique(org_level_no_nas_m$resp_id), train_ind_m)

#explicitly filtering by train and test respondent ids
org_level_no_nas_train_m <- org_level_no_nas_m %>% filter(resp_id %in% train_ind_m)
org_level_no_nas_test_m <- org_level_no_nas_m %>% filter(resp_id %in% test_ind_m)

#train model
meeting_causalAnova1_train <- CausalANOVA(formula = meeting_yn ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party,
                                    data = org_level_no_nas_train_m,
                                    nway = 3,
                                    cluster = org_level_no_nas_train_m$resp_id,
                                    pair.id = org_level_no_nas_train_m$pair_id,
                                    diff = T,
                                    screen = TRUE)

#doing inference with trained model
meeting_causalAnova1 <- test.CausalANOVA(meeting_causalAnova1_train,
                                         newdata = org_level_no_nas_test_m,
                                         pair.id = org_level_no_nas_test_m$pair_id,
                                         cluster = org_level_no_nas_test_m$resp_id,
                                         diff = TRUE)

### Scandal Analysis -----------------------------------------------------------
#separate into train and test sets for both outcomes
set.seed(1374)
train_ind_s <- sample(unique(org_level_no_nas_s$resp_id), 
                    length(unique(org_level_no_nas_s$resp_id))/2, replace = FALSE)
test_ind_s <- setdiff(unique(org_level_no_nas_s$resp_id), train_ind_s)

#explicitly filtering by train and test respondent ids
org_level_no_nas_train_s <- org_level_no_nas_s %>% filter(resp_id %in% train_ind_s)
org_level_no_nas_test_s <- org_level_no_nas_s %>% filter(resp_id %in% test_ind_s)

#train model
scandal_causalAnova1_train <- CausalANOVA(formula = scandal_ny ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party,
                                    data = org_level_no_nas_train_s,
                                    nway = 3,
                                    cluster = org_level_no_nas_train_s$resp_id,
                                    pair.id = org_level_no_nas_train_s$pair_id,
                                    diff = T,
                                    screen = TRUE)

#doing inference with trained model
scandal_causalAnova1 <- test.CausalANOVA(scandal_causalAnova1_train,
                                         newdata = org_level_no_nas_test_s,
                                         pair.id = org_level_no_nas_test_s$pair_id,
                                         cluster = org_level_no_nas_test_s$resp_id,
                                         diff = TRUE)

#Table E1:
#part 1 of two-way interaction table for meeting question
causal_anova_tab_meeting_AMIE2_1 <- xtable::xtable(summary(meeting_causalAnova1,
                                                           verbose.full = FALSE,
                        verbose = FALSE)$AMIE2[1:41,],
                        comment = FALSE,
                        caption = "Causal ANOVA Analysis ---- Meeting Question, Pt. 1")
#LaTeX for paper
print(causal_anova_tab_meeting_AMIE2_1,
      include.rownames = F, 
      comment = F,
      size = "scriptsize")


#Table E2
#part 1 of two-way interaction table for meeting question
causal_anova_tab_meeting_AMIE2_2 <- xtable::xtable(summary(meeting_causalAnova1, 
                                                           verbose.full = FALSE,
                        verbose = FALSE)$AMIE2[42:82,],
                        comment = FALSE,
                        caption = "Causal ANOVA Analysis --- Meeting Question, Pt. 2")
#LaTeX for paper
print(causal_anova_tab_meeting_AMIE2_2,
      include.rownames = F, 
      comment = F,
      size = "scriptsize")


#Three-Way Interaction (table not included in main paper to save space)
# to produce three-way interaction table for meeting question for those curious
causal_anova_tab_meeting_AMIE3 <- xtable::xtable(summary(meeting_causalAnova1, 
                                                           verbose.full = FALSE,
                        verbose = FALSE)$AMIE3,
                        comment = FALSE,
                        caption = "Causal ANOVA Analysis --- Meeting Question (Three-Way Interaction")
#LaTeX for paper
print(causal_anova_tab_meeting_AMIE3,
      include.rownames = F, 
      comment = F,
      size = "scriptsize")
#how many possible effects are there
(summary(meeting_causalAnova1, verbose = F)$AMIE3 %>% 
    nrow) - 1 # -1 because of baseline level 
#seeing how many significant effects there are out of all possible effects
(summary(meeting_causalAnova1, verbose = F)$AMIE3 %>%
  filter((`2.5%CI` > 0 & `97.5%CI` > 0) | (`2.5%CI` < 0 & `97.5%CI` < 0) | base == "***") %>% 
  nrow) - 1 # - 1 because there is a baseline level
# not a lot of confidence that these aren't spurious


#Table E3:
#two-way interaction table for scandal question
causal_anova_tab_scandal <- xtable::xtable(summary(scandal_causalAnova1, verbose.full = FALSE,
                        verbose = FALSE)$AMIE2,
                        comment = FALSE,
                        caption = "Causal ANOVA Analysis --- Scandal Question")
#LaTeX for paper
print(causal_anova_tab_scandal,
      include.rownames = F, 
      comment = F,
      size = "scriptsize")

## Appendix F: Robustness Checks ####

### OLS Models with Fixed Effects ####

#Table F1:
# first fit models
#meeting
meeting_ols1_fe <- lm(data = org_level, meeting_yn ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party +
                                                   market +
                                                   enum)
#scandal
scandal_ols1_fe <- lm(data = org_level, scandal_ny ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party + 
                                                   market +
                                                   enum)
#LaTeX Code for table in paper:
stargazer(meeting_ols1_fe,
          scandal_ols1_fe,
          se = list(calc.ses.cluster(meeting_ols1_fe, data = org_level),
                    calc.ses.cluster(scandal_ols1_fe, data = org_level)),
          header = FALSE,
          t.auto = TRUE,
          keep = 0:14,
          intercept.bottom = FALSE,
          covariate.labels = regress_tab_labels,
          star.cutoffs = c(.05, NA, NA),
          notes = c("*: $p<0.05$"),
          single.row = T,
          font.size = "footnotesize",
          label = "app:tab:models_ols:fe",
          title = "\\footnotesize Linear Probability Models with Fixed Effects By Market and Enumerator. Baseline for `Founded' variable is \\textit{District Capital}. Baseline for `Leader's Former Profession' variable is \\textit{Vendor}. Baseline for `Organization's Funding Is From' variable is \\textit{Contributions from Malawian Citizens}. Baseline for `Organization's Party Connections' variable is \\textit{Independent}.")


### Logit Models ####

#Table F2:
#fitting models without enumerator fixed effects:
# meeting question
meeting_log1 <- glm(data = org_level, meeting_yn ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party, 
                    family = binomial)
# scandal question
scandal_log1 <- glm(data = org_level, scandal_ny ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party,
                    family = binomial)
#fitting models with enumerator fixed effects:
# meeting question
meeting_log1_fe <- glm(data = org_level, meeting_yn ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party +
                                                   market +
                                                   enum, 
                    family = binomial)
# scandal question
scandal_log1_fe <- glm(data = org_level, scandal_ny ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party +
                                                   market +
                                                   enum,
                    family = binomial)
#LaTex code for table:
stargazer(meeting_log1, 
          meeting_log1_fe, 
          scandal_log1, 
          scandal_log1_fe, 
          se = list(calc.ses.cluster(meeting_log1),
                    calc.ses.cluster(meeting_log1_fe),
                    calc.ses.cluster(scandal_log1),
                    calc.ses.cluster(scandal_log1_fe)),
          header = FALSE,
          t.auto = TRUE,
          keep = 0:14,
          covariate.labels = regress_tab_labels,
          intercept.bottom = FALSE,
          star.cutoffs = c(.05,NA,NA),
          notes = c("*: $p<0.05$"),
          font.size = "scriptsize",
          label = "app:tab:models_logit",
          title = "Regression Output for Logit Models without (1, 3), and with (2, 4) Fixed Effects, for Both Outcome Variables")


#Figure F1 Panel (a):
#set seed because of using Monte Carlo simulation to calculate confidence intervals
set.seed(1034)
#plot data for meeting_log1
meeting_log1_plot <- plot.conjoint.default(meeting_log1)
#plot data for scandal_log1
scandal_log1_plot <- plot.conjoint.default(scandal_log1)
#combining both together
both_log_table <- rbind(meeting_log1_plot$effects, scandal_log1_plot$effects)
both_log_table$Group <- rep(c("Meeting", "Scandal"), each = 17)
both_log_plot <- plot.conjoint2(both_log_table, lev_labs2, "Meeting")
#print plot
both_log_plot


#Figure F1 Panel (b):
#set seed because of using Monte Carlo simulation to calculate confidence intervals
set.seed(1374)
#plot data for meeting_log1
meeting_log1_fe_plot <- plot.conjoint.default(meeting_log1_fe)
#plot data for scandal_log1
scandal_log1_fe_plot <- plot.conjoint.default(scandal_log1_fe)
#combining both together
both_log_fe_table <- rbind(meeting_log1_fe_plot$effects, 
                            scandal_log1_fe_plot$effects)
both_log_fe_table$Group <- rep(c("Meeting", "Scandal"), each = 17)
both_log_fe_plot <- plot.conjoint2(both_log_fe_table, lev_labs2, "Meeting")
#print plot
both_log_fe_plot

### Broken Down By Pair ####

#### OLS ####

#Table F3:
#fitting models on each question for each pair
meeting_ols1_pair1 <- lm(data = org_level, meeting_yn ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party,
                         subset = pair_num == 1)
scandal_ols1_pair1 <- lm(data = org_level, scandal_ny ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party,
                         subset = pair_num == 1)
meeting_ols1_pair2 <- lm(data = org_level, meeting_yn ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party,
                         subset = pair_num == 2)
scandal_ols1_pair2 <- lm(data = org_level, scandal_ny ~ capital +
                                                   leader_frmr_prof +
                                                   funding +
                                                   party,
                         subset = pair_num == 2)
# LaTeX for table:
stargazer(meeting_ols1_pair1,
          meeting_ols1_pair2,
          scandal_ols1_pair1,
          scandal_ols1_pair2,
          se = list(calc.ses.cluster(meeting_ols1_pair1, 
                                     data = org_level[org_level$pair_num == 1,]),
                    calc.ses.cluster(meeting_ols1_pair2, 
                                     data = org_level[org_level$pair_num == 2,]),
                    calc.ses.cluster(scandal_ols1_pair2, 
                                     data = org_level[org_level$pair_num == 1,]),
                    calc.ses.cluster(scandal_ols1_pair2, 
                                     data = org_level[org_level$pair_num == 2,])),
          header = FALSE,
          t.auto = TRUE,
          keep = 0:14,
          column.labels = c("Pair 1", "Pair 2", "Pair 1", "Pair 2"),
          intercept.bottom = FALSE,
          covariate.labels = regress_tab_labels,
          star.cutoffs = c(.05, NA, NA),
          notes = c("*: $p<0.05$"),
          font.size = "scriptsize",
          label = "app:tab:models_ols:pairs",
          column.sep.width = "0pt",
          omit.stat = "f",
          title = "\\scriptsize Linear Probability Models By Pair for Both Outcomes. Baseline for `Founded' variable is \\textit{District Capital}. Baseline for `Leader's Former Profession' variable is \\textit{Vendor}. Baseline for `Organization's Funding Is From' variable is \\textit{Contributions from Malawian Citizens}. Baseline for `Organization's Party Connections' variable is \\textit{Independent}.")


#Figure F2 Panel (a):
##Meeting
#binding together the amces for each pair for meeting question
amces_ols_meeting_pair <- rbind(get_amces(meeting_ols1_pair1, 
                                          org_level$resp_id[org_level$pair_num == 1],
                                          2:14,
                                          lev_labs_ols, att_names, att_lengths), 
                                get_amces(meeting_ols1_pair2, 
                                          org_level$resp_id[org_level$pair_num == 2],
                                          2:14,
                                          lev_labs_ols, att_names, att_lengths))
#making Group variable for plotting purposes
amces_ols_meeting_pair$Group <- c(rep("Pair 1", 21),
                          rep("Pair 2", 21))
#plotting
plot_amces(amces_ols_meeting_pair, 
           title = "AMCEs by Pair for Meeting Question",
           labx = "Organization Selected",
           compare = T)


#Figure F2 Panel (b):
##Scandal
#binding together the amces for each pair for scandal question
amces_ols_scandal_pair <- rbind(get_amces(scandal_ols1_pair1, 
                                          org_level$resp_id[org_level$pair_num == 1], 
                                          2:14,
                                          lev_labs_ols, att_names, att_lengths), 
                                get_amces(scandal_ols1_pair2,
                                          org_level$resp_id[org_level$pair_num == 2],
                                          2:14,
                                          lev_labs_ols, att_names, att_lengths))
#making Group variable for plotting purposes
amces_ols_scandal_pair$Group <- c(rep("Pair 1", 21),
                          rep("Pair 2", 21))
#plotting
plot_amces(amces_ols_scandal_pair, 
           title = "AMCEs by Pair for Scandal Question",
           labx = "Organization Selected",
           compare = T)

#### Marginal Means ####

#Figure F3 Panel (a):
#differences in marginal means between pair 1 and pair 2
meeting_mmdiff_pair <- mm_diffs(org_level,
                                  meeting_yn ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                                  id = ~ resp_id,
                                  by = ~ pair_fac,
                                alpha = .05)
#updating labels for readability purposes
meeting_mmdiff_pair <- update_labels(meeting_mmdiff_pair, 
                             unlist(lev_labs_ols), att_names, 
                             att_lengths)
#plot of these differences
plot(meeting_mmdiff_pair) + theme(legend.position = "none")  + 
  scale_colour_grey()


#Figure F3 Panel (b):
#marginal means by pair 1 and pair 2
meeting_mm_pair <- cj(org_level, meeting_yn ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                        id = ~ resp_id, 
                        estimate = "mm",
                        by = ~ pair_fac)
#updating labels for readability purposes
meeting_mm_pair <- update_labels(meeting_mm_pair,
                                      unlist(lev_labs_ols), att_names,
                                      att_lengths, BY = T, 
                                      BY_labs = c("Pair 1",
                                                  "Pair 2"),
                                    BY_name = "Pair")

#plot of marginal means by pair 1 and pair 2
plot(meeting_mm_pair, group = "Pair", vline = 0.5)  + 
  scale_colour_grey()


#Figure F3 Anova Test (in caption):
cj_anova(org_level[!is.na(org_level$meeting), ],
         meeting_yn ~ capital +
                                 leader_frmr_prof +
                                 funding + 
                                 party,
                        id = ~ resp_id, 
                        by = ~ pair_num)


#Figure F4 Panel (a):
#differences in marginal means between members and non-members
scandal_mmdiff_pair <- mm_diffs(org_level,
                                  scandal_ny ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                                  id = ~ resp_id,
                                  by = ~ pair_fac,
                                alpha = .05)
#updating labels for readability purposes
scandal_mmdiff_pair <- update_labels(scandal_mmdiff_pair, 
                             unlist(lev_labs_ols), att_names, 
                             att_lengths)
#plot of these differences
plot(scandal_mmdiff_pair) + theme(legend.position = "none")  + 
  scale_colour_grey()


#Figure F4 Panel (b):
#marginal means by members and non-members
scandal_mm_pair <- cj(org_level, scandal_ny ~ capital +
                                     leader_frmr_prof +
                                     funding + 
                                     party,
                        id = ~ resp_id, 
                        estimate = "mm",
                        by = ~ pair_fac)
#updating labels for readability purposes
scandal_mm_pair <- update_labels(scandal_mm_pair,
                                      unlist(lev_labs_ols), att_names,
                                      att_lengths, BY = T, 
                                      BY_labs = c("Pair 1",
                                                  "Pair 2"),
                                    BY_name = "Pair")
#plot of marginal means by members and non-members
plot(scandal_mm_pair, group = "Pair", vline = 0.5)  + 
  scale_colour_grey()


#Figure F4 Anova Test (in caption):
cj_anova(org_level[!is.na(org_level$scandal), ],
         scandal_ny ~ capital +
                                 leader_frmr_prof +
                                 funding + 
                                 party,
                        id = ~ resp_id, 
                        by = ~ pair_num)

## Appendix G: Summary Statistics ####

# summary stats table object
sum_stats <- rbind(vendor_long %>% 
  select(female) %>% 
  summarise_all(list(~mean(., na.rm = T),
                     ~sd(., na.rm = T),
                     ~min(., na.rm = T),
                     ~max(., na.rm = T))),
  vendor_long %>% 
  select(age) %>% 
  summarise_all(list(~mean(., na.rm = T),
                     ~sd(., na.rm = T),
                     ~min(., na.rm = T),
                     ~max(., na.rm = T))),
  vendor_long %>% 
  select(literacy_any) %>% 
  summarise_all(list(~mean(., na.rm = T),
                     ~sd(., na.rm = T),
                     ~min(., na.rm = T),
                     ~max(., na.rm = T))),
  vendor_long %>% 
  select(educ_cat) %>% 
  summarise_all(list(~mean(., na.rm = T),
                     ~sd(., na.rm = T),
                     ~min(., na.rm = T),
                     ~max(., na.rm = T))),
  vendor_long %>% 
  select(hh_income_trim_99) %>% 
  summarise_all(list(~mean(., na.rm = T),
                     ~sd(., na.rm = T),
                     ~min(., na.rm = T),
                     ~max(., na.rm = T))),
  vendor_long %>% 
  select(service) %>% 
  summarise_all(list(~mean(., na.rm = T),
                     ~sd(., na.rm = T),
                     ~min(., na.rm = T),
                     ~max(., na.rm = T))),
  vendor_long %>% 
  select(sell_daily) %>% 
  summarise_all(list(~mean(., na.rm = T),
                     ~sd(., na.rm = T),
                     ~min(., na.rm = T),
                     ~max(., na.rm = T))),
  vendor_long %>% 
  select(yrs_in_mkt_fix) %>% 
  summarise_all(list(~mean(., na.rm = T),
                     ~sd(., na.rm = T),
                     ~min(., na.rm = T),
                     ~max(., na.rm = T))),
  vendor_long %>% 
  select(intend_vote) %>% 
  summarise_all(list(~mean(., na.rm = T),
                     ~sd(., na.rm = T),
                     ~min(., na.rm = T),
                     ~max(., na.rm = T))),
  vendor_long %>% 
  select(registered_vt) %>% 
  summarise_all(list(~mean(., na.rm = T),
                     ~sd(., na.rm = T),
                     ~min(., na.rm = T),
                     ~max(., na.rm = T)))
)
# adding a variable column
sum_stats <- sum_stats %>% 
  mutate(Variable = c("Female",
                      "Age",
                      "Literacy",
                      "Education",
                      "Household Income",
                      "Service Stall",
                      "Sells Daily",
                      "Years in Market",
                      "Intends to Vote",
                      "Registered to Vote")) %>% 
  select(Variable, everything())
# LaTeX code for table
print(xtable::xtable(sum_stats, header = F,
          caption = "Summary Statistics for Sample"),
      include.rownames = F, comment = F)
